# Initial imports
import pandas as pd
import hvplot.pandas
from path import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
# Load data
file_path = "Resources/crypto_data.csv"
crypto_df = pd.read_csv(file_path, index_col=[0])
crypto_df.head(10)
| CoinName | Algorithm | IsTrading | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | True | PoW/PoS | 4.199995e+01 | 42 |
| 365 | 365Coin | X11 | True | PoW/PoS | NaN | 2300000000 |
| 404 | 404Coin | Scrypt | True | PoW/PoS | 1.055185e+09 | 532000000 |
| 611 | SixEleven | SHA-256 | True | PoW | NaN | 611000 |
| 808 | 808 | SHA-256 | True | PoW/PoS | 0.000000e+00 | 0 |
| 1337 | EliteCoin | X13 | True | PoW/PoS | 2.927942e+10 | 314159265359 |
| 2015 | 2015 coin | X11 | True | PoW/PoS | NaN | 0 |
| BTC | Bitcoin | SHA-256 | True | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | True | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | True | PoW | 6.303924e+07 | 84000000 |
# Find null values
for column in crypto_df.columns:
print(f"Column {column} has {crypto_df[column].isnull().sum()} null values")
Column CoinName has 0 null values Column Algorithm has 0 null values Column IsTrading has 0 null values Column ProofType has 0 null values Column TotalCoinsMined has 508 null values Column TotalCoinSupply has 0 null values
# Keep all the cryptocurrencies that are being traded.
crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]
print(crypto_df.shape)
crypto_df.head(10)
(1144, 6)
| CoinName | Algorithm | IsTrading | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | True | PoW/PoS | 4.199995e+01 | 42 |
| 365 | 365Coin | X11 | True | PoW/PoS | NaN | 2300000000 |
| 404 | 404Coin | Scrypt | True | PoW/PoS | 1.055185e+09 | 532000000 |
| 611 | SixEleven | SHA-256 | True | PoW | NaN | 611000 |
| 808 | 808 | SHA-256 | True | PoW/PoS | 0.000000e+00 | 0 |
| 1337 | EliteCoin | X13 | True | PoW/PoS | 2.927942e+10 | 314159265359 |
| 2015 | 2015 coin | X11 | True | PoW/PoS | NaN | 0 |
| BTC | Bitcoin | SHA-256 | True | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | True | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | True | PoW | 6.303924e+07 | 84000000 |
# Keep all the cryptocurrencies that have a working algorithm.
crypto_df = crypto_df.dropna(subset = ["Algorithm"])
print(crypto_df.shape)
crypto_df.head(10)
(1144, 6)
| CoinName | Algorithm | IsTrading | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | True | PoW/PoS | 4.199995e+01 | 42 |
| 365 | 365Coin | X11 | True | PoW/PoS | NaN | 2300000000 |
| 404 | 404Coin | Scrypt | True | PoW/PoS | 1.055185e+09 | 532000000 |
| 611 | SixEleven | SHA-256 | True | PoW | NaN | 611000 |
| 808 | 808 | SHA-256 | True | PoW/PoS | 0.000000e+00 | 0 |
| 1337 | EliteCoin | X13 | True | PoW/PoS | 2.927942e+10 | 314159265359 |
| 2015 | 2015 coin | X11 | True | PoW/PoS | NaN | 0 |
| BTC | Bitcoin | SHA-256 | True | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | True | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | True | PoW | 6.303924e+07 | 84000000 |
# Remove the "IsTrading" column.
crypto_df = crypto_df.drop(['IsTrading'], axis=1)
print(crypto_df.shape)
crypto_df.head(10)
(1144, 5)
| CoinName | Algorithm | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | PoW/PoS | 4.199995e+01 | 42 |
| 365 | 365Coin | X11 | PoW/PoS | NaN | 2300000000 |
| 404 | 404Coin | Scrypt | PoW/PoS | 1.055185e+09 | 532000000 |
| 611 | SixEleven | SHA-256 | PoW | NaN | 611000 |
| 808 | 808 | SHA-256 | PoW/PoS | 0.000000e+00 | 0 |
| 1337 | EliteCoin | X13 | PoW/PoS | 2.927942e+10 | 314159265359 |
| 2015 | 2015 coin | X11 | PoW/PoS | NaN | 0 |
| BTC | Bitcoin | SHA-256 | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | PoW | 6.303924e+07 | 84000000 |
# Remove rows that have at least 1 null value.
crypto_df = crypto_df.dropna()
print(crypto_df.shape)
crypto_df.head(10)
(685, 5)
| CoinName | Algorithm | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | PoW/PoS | 4.199995e+01 | 42 |
| 404 | 404Coin | Scrypt | PoW/PoS | 1.055185e+09 | 532000000 |
| 808 | 808 | SHA-256 | PoW/PoS | 0.000000e+00 | 0 |
| 1337 | EliteCoin | X13 | PoW/PoS | 2.927942e+10 | 314159265359 |
| BTC | Bitcoin | SHA-256 | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | PoW | 6.303924e+07 | 84000000 |
| DASH | Dash | X11 | PoW/PoS | 9.031294e+06 | 22000000 |
| XMR | Monero | CryptoNight-V7 | PoW | 1.720114e+07 | 0 |
| ETC | Ethereum Classic | Ethash | PoW | 1.133597e+08 | 210000000 |
# Keep the rows where coins are mined.
crypto_df = crypto_df.loc[crypto_df['TotalCoinsMined']> 0]
print(crypto_df.shape)
crypto_df.head(10)
(532, 5)
| CoinName | Algorithm | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|---|
| 42 | 42 Coin | Scrypt | PoW/PoS | 4.199995e+01 | 42 |
| 404 | 404Coin | Scrypt | PoW/PoS | 1.055185e+09 | 532000000 |
| 1337 | EliteCoin | X13 | PoW/PoS | 2.927942e+10 | 314159265359 |
| BTC | Bitcoin | SHA-256 | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethereum | Ethash | PoW | 1.076842e+08 | 0 |
| LTC | Litecoin | Scrypt | PoW | 6.303924e+07 | 84000000 |
| DASH | Dash | X11 | PoW/PoS | 9.031294e+06 | 22000000 |
| XMR | Monero | CryptoNight-V7 | PoW | 1.720114e+07 | 0 |
| ETC | Ethereum Classic | Ethash | PoW | 1.133597e+08 | 210000000 |
| ZEC | ZCash | Equihash | PoW | 7.383056e+06 | 21000000 |
# Create a new DataFrame that holds only the cryptocurrencies names.
cryptoname_df = crypto_df.filter(['CoinName'], axis=1)
print(crypto_df.shape)
cryptoname_df.head(5)
(532, 5)
| CoinName | |
|---|---|
| 42 | 42 Coin |
| 404 | 404Coin |
| 1337 | EliteCoin |
| BTC | Bitcoin |
| ETH | Ethereum |
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm.
crypto_df = crypto_df.drop(['CoinName'], axis=1)
print(crypto_df.shape)
crypto_df.head(10)
(532, 4)
| Algorithm | ProofType | TotalCoinsMined | TotalCoinSupply | |
|---|---|---|---|---|
| 42 | Scrypt | PoW/PoS | 4.199995e+01 | 42 |
| 404 | Scrypt | PoW/PoS | 1.055185e+09 | 532000000 |
| 1337 | X13 | PoW/PoS | 2.927942e+10 | 314159265359 |
| BTC | SHA-256 | PoW | 1.792718e+07 | 21000000 |
| ETH | Ethash | PoW | 1.076842e+08 | 0 |
| LTC | Scrypt | PoW | 6.303924e+07 | 84000000 |
| DASH | X11 | PoW/PoS | 9.031294e+06 | 22000000 |
| XMR | CryptoNight-V7 | PoW | 1.720114e+07 | 0 |
| ETC | Ethash | PoW | 1.133597e+08 | 210000000 |
| ZEC | Equihash | PoW | 7.383056e+06 | 21000000 |
# Use get_dummies() to create variables for text features.
crypto1_df = pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])
print(crypto1_df.shape)
crypto1_df.head(10)
(532, 98)
| TotalCoinsMined | TotalCoinSupply | Algorithm_1GB AES Pattern Search | Algorithm_536 | Algorithm_Argon2d | Algorithm_BLAKE256 | Algorithm_Blake | Algorithm_Blake2S | Algorithm_Blake2b | Algorithm_C11 | ... | ProofType_PoW/PoS | ProofType_PoW/PoS | ProofType_PoW/PoW | ProofType_PoW/nPoS | ProofType_Pos | ProofType_Proof of Authority | ProofType_Proof of Trust | ProofType_TPoS | ProofType_Zero-Knowledge Proof | ProofType_dPoW/PoW | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 42 | 4.199995e+01 | 42 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 404 | 1.055185e+09 | 532000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1337 | 2.927942e+10 | 314159265359 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| BTC | 1.792718e+07 | 21000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ETH | 1.076842e+08 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| LTC | 6.303924e+07 | 84000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| DASH | 9.031294e+06 | 22000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| XMR | 1.720114e+07 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ETC | 1.133597e+08 | 210000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ZEC | 7.383056e+06 | 21000000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
10 rows × 98 columns
# Standardize the data with StandardScaler().
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
crypto_scaled = scaler.fit_transform(crypto1_df)
print(crypto_scaled[0:5])
[[-0.11710817 -0.1528703 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.07530656 -0.0433963 -0.06142951 -0.06142951 -0.0433963 -0.0433963 -0.19245009 -0.06142951 -0.09740465 -0.0433963 -0.11547005 -0.07530656 -0.0433963 -0.0433963 -0.15191091 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.08703883 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.08703883 -0.08703883 -0.08703883 -0.0433963 -0.13118084 -0.13840913 -0.13840913 -0.0433963 -0.06142951 -0.0433963 -0.07530656 -0.18168574 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.15826614 -0.31491833 -0.0433963 -0.08703883 -0.07530656 -0.06142951 1.38675049 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.39879994 -0.0433963 -0.18168574 -0.0433963 -0.08703883 -0.08703883 -0.10680283 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.43911856 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.89632016 -0.0433963 -0.0433963 1.42222617 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 ] [-0.09396955 -0.145009 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.07530656 -0.0433963 -0.06142951 -0.06142951 -0.0433963 -0.0433963 -0.19245009 -0.06142951 -0.09740465 -0.0433963 -0.11547005 -0.07530656 -0.0433963 -0.0433963 -0.15191091 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.08703883 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.08703883 -0.08703883 -0.08703883 -0.0433963 -0.13118084 -0.13840913 -0.13840913 -0.0433963 -0.06142951 -0.0433963 -0.07530656 -0.18168574 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.15826614 -0.31491833 -0.0433963 -0.08703883 -0.07530656 -0.06142951 1.38675049 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.39879994 -0.0433963 -0.18168574 -0.0433963 -0.08703883 -0.08703883 -0.10680283 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.43911856 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.89632016 -0.0433963 -0.0433963 1.42222617 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 ] [ 0.52494561 4.48942416 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.07530656 -0.0433963 -0.06142951 -0.06142951 -0.0433963 -0.0433963 -0.19245009 -0.06142951 -0.09740465 -0.0433963 -0.11547005 -0.07530656 -0.0433963 -0.0433963 -0.15191091 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.08703883 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.08703883 -0.08703883 -0.08703883 -0.0433963 -0.13118084 -0.13840913 -0.13840913 -0.0433963 -0.06142951 -0.0433963 -0.07530656 -0.18168574 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.15826614 -0.31491833 -0.0433963 -0.08703883 -0.07530656 -0.06142951 -0.72111026 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.39879994 -0.0433963 5.50400923 -0.0433963 -0.08703883 -0.08703883 -0.10680283 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.43911856 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.89632016 -0.0433963 -0.0433963 1.42222617 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 ] [-0.11671506 -0.15255998 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.07530656 -0.0433963 -0.06142951 -0.06142951 -0.0433963 -0.0433963 -0.19245009 -0.06142951 -0.09740465 -0.0433963 -0.11547005 -0.07530656 -0.0433963 -0.0433963 -0.15191091 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.08703883 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.08703883 -0.08703883 -0.08703883 -0.0433963 -0.13118084 -0.13840913 -0.13840913 -0.0433963 -0.06142951 -0.0433963 -0.07530656 -0.18168574 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.15826614 3.17542648 -0.0433963 -0.08703883 -0.07530656 -0.06142951 -0.72111026 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.39879994 -0.0433963 -0.18168574 -0.0433963 -0.08703883 -0.08703883 -0.10680283 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.43911856 -0.0433963 -0.06142951 -0.0433963 -0.0433963 1.11567277 -0.0433963 -0.0433963 -0.70312305 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 ] [-0.11474682 -0.1528703 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.07530656 -0.0433963 -0.06142951 -0.06142951 -0.0433963 -0.0433963 -0.19245009 -0.06142951 -0.09740465 -0.0433963 -0.11547005 -0.07530656 -0.0433963 -0.0433963 -0.15191091 -0.0433963 7.62306442 -0.0433963 -0.0433963 -0.08703883 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.08703883 -0.08703883 -0.08703883 -0.0433963 -0.13118084 -0.13840913 -0.13840913 -0.0433963 -0.06142951 -0.0433963 -0.07530656 -0.18168574 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.15826614 -0.31491833 -0.0433963 -0.08703883 -0.07530656 -0.06142951 -0.72111026 -0.0433963 -0.0433963 -0.06142951 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.39879994 -0.0433963 -0.18168574 -0.0433963 -0.08703883 -0.08703883 -0.10680283 -0.0433963 -0.13118084 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.07530656 -0.43911856 -0.0433963 -0.06142951 -0.0433963 -0.0433963 1.11567277 -0.0433963 -0.0433963 -0.70312305 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 -0.0433963 ]]
# Using PCA to reduce dimension to three principal components.
pca = PCA(n_components=3)
crypto_pca = pca.fit_transform(crypto_scaled)
crypto_pca
array([[-0.33453058, 1.02245356, -0.49560737],
[-0.3178626 , 1.02274894, -0.49595228],
[ 2.30618856, 1.67632477, -0.60190593],
...,
[ 0.32223237, -2.29307998, 0.36221211],
[-0.13880578, -1.96702498, 0.3980826 ],
[-0.28659074, 0.84775993, -0.25370339]])
# Create a DataFrame with the three principal components.
pcs_df = pd.DataFrame(data=crypto_pca, columns=['PC 1', 'PC 2', 'PC 3'])
pcs_df.index = cryptoname_df.index
print(pcs_df.shape)
pcs_df.head(10)
(532, 3)
| PC 1 | PC 2 | PC 3 | |
|---|---|---|---|
| 42 | -0.334531 | 1.022454 | -0.495607 |
| 404 | -0.317863 | 1.022749 | -0.495952 |
| 1337 | 2.306189 | 1.676325 | -0.601906 |
| BTC | -0.145326 | -1.353450 | 0.182259 |
| ETH | -0.155134 | -2.013085 | 0.371618 |
| LTC | -0.164206 | -1.101826 | -0.003118 |
| DASH | -0.398211 | 1.157685 | -0.418778 |
| XMR | -0.155237 | -2.206105 | 0.389927 |
| ETC | -0.153575 | -2.013163 | 0.371596 |
| ZEC | -0.138805 | -1.967025 | 0.398083 |
# Create an elbow curve to find the best value for K.
inertia = []
k = list(range(1, 11))
# Calculate the inertia for the range of K values
for i in k:
km = KMeans(n_clusters=i, random_state=0)
km.fit(pcs_df)
inertia.append(km.inertia_)
D:\ProgramData\Anaconda3\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:882: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=3. f"KMeans is known to have a memory leak on Windows "
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)
Running K-Means with k=4
# Initialize the K-Means model.
model = KMeans(n_clusters=4, random_state=0)
# Fit the model
model.fit(pcs_df)
# Predict clusters
predictions = model.predict(pcs_df)
predictions
array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,
1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0,
0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 3, 0, 1, 1, 1, 0, 3, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 3, 1, 0, 1, 1, 0, 0,
1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 3, 0, 2, 0, 0,
0, 1, 1, 0])
# Create a new DataFrame including predicted clusters and cryptocurrencies features.
# Concatentate the crypto_df and pcs_df DataFrames on the same columns.
clustered_df = crypto_df.join(pcs_df)
# Add a new column, "CoinName" to the clustered_df DataFrame that holds the names of the cryptocurrencies.
# YOUR CODE HERE
clustered_df = clustered_df.join(cryptoname_df)
# Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df["class"] = model.labels_
# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)
(532, 9)
| Algorithm | ProofType | TotalCoinsMined | TotalCoinSupply | PC 1 | PC 2 | PC 3 | CoinName | class | |
|---|---|---|---|---|---|---|---|---|---|
| 42 | Scrypt | PoW/PoS | 4.199995e+01 | 42 | -0.334531 | 1.022454 | -0.495607 | 42 Coin | 0 |
| 404 | Scrypt | PoW/PoS | 1.055185e+09 | 532000000 | -0.317863 | 1.022749 | -0.495952 | 404Coin | 0 |
| 1337 | X13 | PoW/PoS | 2.927942e+10 | 314159265359 | 2.306189 | 1.676325 | -0.601906 | EliteCoin | 0 |
| BTC | SHA-256 | PoW | 1.792718e+07 | 21000000 | -0.145326 | -1.353450 | 0.182259 | Bitcoin | 1 |
| ETH | Ethash | PoW | 1.076842e+08 | 0 | -0.155134 | -2.013085 | 0.371618 | Ethereum | 1 |
| LTC | Scrypt | PoW | 6.303924e+07 | 84000000 | -0.164206 | -1.101826 | -0.003118 | Litecoin | 1 |
| DASH | X11 | PoW/PoS | 9.031294e+06 | 22000000 | -0.398211 | 1.157685 | -0.418778 | Dash | 0 |
| XMR | CryptoNight-V7 | PoW | 1.720114e+07 | 0 | -0.155237 | -2.206105 | 0.389927 | Monero | 1 |
| ETC | Ethash | PoW | 1.133597e+08 | 210000000 | -0.153575 | -2.013163 | 0.371596 | Ethereum Classic | 1 |
| ZEC | Equihash | PoW | 7.383056e+06 | 21000000 | -0.138805 | -1.967025 | 0.398083 | ZCash | 1 |
# Creating a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
clustered_df,
x="PC 1",
y="PC 2",
z="PC 3",
color="class",
symbol="class",
width=800,
hover_name="CoinName",
hover_data=["Algorithm"]
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()
# Create a table with tradable cryptocurrencies.
clustered_df.hvplot.table(columns=["CoinName", "Algorithm", "ProofType", "TotalCoinSupply", "TotalCoinsMined", "class"])
# Print the total number of tradable cryptocurrencies.
print(f'Total Number of Tradable Currencies:',clustered_df['CoinName'].count())
Total Number of Tradable Currencies: 532
# Scaling data to create the scatter plot with tradable cryptocurrencies.
X = clustered_df[['TotalCoinSupply', 'TotalCoinsMined']]
X_scaled = MinMaxScaler().fit_transform(X)
X_scaled
array([[4.20000000e-11, 0.00000000e+00],
[5.32000000e-04, 1.06585544e-03],
[3.14159265e-01, 2.95755135e-02],
...,
[1.40022261e-03, 9.90135079e-04],
[2.10000000e-05, 7.37028150e-06],
[1.00000000e-06, 1.29582282e-07]])
# Create a new DataFrame that has the scaled data with the clustered_df DataFrame index.
X_df = pd.DataFrame(data=X_scaled, columns=["TotalCoinSupply", "TotalCoinsMined"], index=clustered_df.index)
# Add the "CoinName" column from the clustered_df DataFrame to the new DataFrame.
X_df = X_df.join(clustered_df['CoinName'])
# Add the "Class" column from the clustered_df DataFrame to the new DataFrame.
X_df = X_df.join(clustered_df['class'])
print(X_df.shape)
X_df.head(10)
(532, 4)
| TotalCoinSupply | TotalCoinsMined | CoinName | class | |
|---|---|---|---|---|
| 42 | 4.200000e-11 | 0.000000 | 42 Coin | 0 |
| 404 | 5.320000e-04 | 0.001066 | 404Coin | 0 |
| 1337 | 3.141593e-01 | 0.029576 | EliteCoin | 0 |
| BTC | 2.100000e-05 | 0.000018 | Bitcoin | 1 |
| ETH | 0.000000e+00 | 0.000109 | Ethereum | 1 |
| LTC | 8.400000e-05 | 0.000064 | Litecoin | 1 |
| DASH | 2.200000e-05 | 0.000009 | Dash | 0 |
| XMR | 0.000000e+00 | 0.000017 | Monero | 1 |
| ETC | 2.100000e-04 | 0.000115 | Ethereum Classic | 1 |
| ZEC | 2.100000e-05 | 0.000007 | ZCash | 1 |
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
X_df.hvplot.scatter(
x="TotalCoinSupply",
y="TotalCoinsMined",
hover_cols=["CoinName","class"],
by="class",
)